version 16.1
clear all
cd "MYPATH\derived\make_master_dataset"
adopath + ../../ado/

cap log close

preliminaries
log using "sample_stats.log", replace

cap mkdir "$TEMP\derived\make_master_dataset"


set seed 921

program main
	matrix sample_construct = J(13, 4, .)
	matrix colnames sample_construct = "Fetus-Tests" "Mothers" "Fetuses" "Pregnancies"
	kub
	di "finished kub"
	mfr_preg_merge
	di "finished mfr_preg_merge"
	analysis_sample
	matrix list sample_construct
	frmttable using "${RESULTS}\derived\make_analysis_sample\sample_construct.tex", statmat(sample_construct) ///
	  rtitles("Full KUB testing sample" \ "Nonmissing fetus identifier" \ ///
	    "Test date after Feb 1, 2009" \ "In Pregnancy Register or MBR" \ "Pregnancy Register only" \ ///
	    "MBR only" \ "Both" \ "After KUB to PregReg-MBR merge" \ "Singleton pregnancies" ///
		\"2011 through 2019" \ "Analysis Sample" \ "1/51 to 1/200 counties" \ ///
		"High risk covered counties") ///
	  ctitles("", "Fetus-Tests",  "Mothers",  "Fetuses", "Pregnancies") ///
	  tex sdec(0) replace

end


program kub
    use "${DATA}\KUB_data\cleaned_integrated_kub.dta", clear
	global row_count = 0
	local column = 0
	gen count_test = _n
	foreach var in count_test lopnr {
		local row = $row_count
		local column = `column' + 1
		qui unique `var'
		local n_total = r(unique)
		qui unique `var' if !mi(foster_risk)
		local n_nmi = r(unique)
		qui unique `var' if undersokningsdatum > td(01feb2009) & !mi(foster_risk)
		local n_afterfeb2009 = r(unique)
		matrix temp = `n_total' \ `n_nmi' \ `n_afterfeb2009'
		forval i = 1/3 {
			matrix sample_construct[`row' + `i', `column'] = temp[`i', 1]
		}
	}
	global row_count = $row_count + 3
	global rwnames "Full KUB testing sample \ " "Nonmissing fetus identifier \ " ///
	  "Test date after Feb 1, 2009 \ "
end	

program mfr_preg_merge
	* PregReg-MBR prior to merge
	use "$TEMP\derived\make_master_dataset\gravreg.dta", clear
	gen fetus_count = _n
	local column = 1
	foreach var in lopnr fetus_count {
		local row = $row_count
		local column = `column' + 1
		qui unique `var' 
		local n_union = r(unique)
		qui unique `var'  if in_MFR == 0 & in_PregR == 1
		local n_pregR = r(unique)
		qui unique `var'  if in_MFR == 1 & in_PregR == 0
		local n_MFR = r(unique)
		qui unique `var'  if in_MFR == 1 & in_PregR == 1
		local n_both = r(unique)
		matrix temp = `n_union' \ `n_pregR' \ `n_MFR' \ `n_both'
		forval i = 1/4 {
			matrix sample_construct[`row' + `i', `column'] = temp[`i', 1]
		}
	}
	global row_count = $row_count + 4
	local tempnames "In Pregnancy Register or MBR \ " "Pregnancy Register only \ " "MBR only \ " ///
	  "Both \ "
	global rwnames $rwnames `tempnames'

	* After merge
	use "${TEMP}\derived\make_master_dataset\master_dataset_w_dates", clear
	bysort pregnancy: gen fetus_count = num_fetuses if _n == 1
	drop if preg_in_KUB == 0 
	local column = 0
	foreach var in test_id lopnr fetus_count pregnancy {
		di "`var'"
		local row = $row_count
	    local column = `column' + 1
		if "`var'" != "fetus_count" {
			qui unique `var'
			local n_after_kub_merge = r(unique)
			qui unique `var' if !(num_fetuses > 1 & !mi(num_fetuses))
			local n_singleton = r(unique)
			qui unique `var' ///
			  if !(num_fetuses > 1 & !mi(num_fetuses)) & date_preg >= td(01jan2011) & date_preg < td(01jan2020)
			local n_sing_2011_2019 = r(unique)
		}
		if "`var'" == "fetus_count" {
			qui sum `var', d 
			local n_after_kub_merge = r(sum)
			qui sum `var' if !(fetus_count > 1 & !mi(num_fetuses)), d
			local n_singleton = r(sum)
			qui sum `var' if ///
			!(num_fetuses > 1 & !mi(num_fetuses)) & date_preg >= td(01jan2011) & date_preg < td(01jan2020), d
			local n_sing_2011_2019 = r(sum)
		}
		matrix temp = `n_after_kub_merge' \ `n_singleton' \ `n_sing_2011_2019' 
		matrix list temp
		forval i = 1/3 {
			matrix sample_construct[`row' + `i', `column'] = temp[`i', 1]
		}
	}
	global row_count = $row_count + 3
	local tempnames "After KUB to PregReg-MBR merge \ " "Singleton pregnancies \ " "2011 through 2019 \ "
	global rwnames $rwnames `tempnames'
end

program analysis_sample
	use "$DATA\analysis_sample", clear
	drop if preg_in_KUB == 0 
	qui gen nipt_51_200 = inlist(lan, 1, 4, 9, 13, 14, 19, 20, 23)
	qui gen nipt_hr_cov = inlist(lan, 3, 18, 21, 22)

	gen fetus_count = _n
	local column = 2
	foreach var in lopnr pregnancy {
		local row = $row_count
		qui unique `var' 
		local n_analysis = r(unique)
		qui unique `var' if nipt_51_200 == 1
		local n_51_200 = r(unique)
		qui unique `var' if nipt_hr_cov == 1
		local n_hr = r(unique)
		matrix temp = `n_analysis' \ `n_51_200' \ `n_hr' 
		forval i = 1/3 {
			matrix sample_construct[`row' + `i', `column'] = temp[`i', 1]
		}
		local column = `column' + 2
	}
	global row_count = $row_count + 4
	local tempnames "Analysis Sample \ " "1/51 to 1/200 counties \ " "High risk covered counties \"
	global rwnames $rwnames `tempnames'
	di "$rwnames" 
end


* Execute
main
